# ---- Caption and Heading of File ---------------------------------------------
#                                                                              #
#        Replication script for original survey analysis.                      #
#        Not called directly, is a subroutine.                                 #
#                                                                              #
################################################################################


# ----    Load Dataset             ---------------------------------------------

switch(surveyToAnalze,  
      "Porter" = {
        surveydata <- read.csv("datasets/Porter survey data.csv")
        thisState <- "FL" # Porter and Washington are FL cases
        pstrikes <- 10; dstrikes <- 10; jury_n <- 12 
      },
      "Washington" =  {
        surveydata <- read.csv("datasets/Washington survey data.csv")
        thisState <- "FL" # Porter and Washington are FL cases
        pstrikes <- 10; dstrikes <- 10; jury_n <- 12 
      },
      "Fulminante" =  {
        surveydata <- read.csv("datasets/Fulminante survey data.csv")
        thisState <- "AZ" # Fulminante is AZ case
        pstrikes <- 10; dstrikes <- 10; jury_n <- 12 
      },
      "Hopkins" =  {
        surveydata <- read.csv("datasets/Hopkins survey data.csv")
        thisState <- "TX" # Hopkins is TX case
        pstrikes <- 15; dstrikes <- 15; jury_n <- 12 
      },
      {
        stop("Need to select valid survey to analyze.")
      }
)


# ---- Limit to Complete and Qualified Responses -------------------------------

# subset to non-missing responses, some rows empty except ResponseId
surveydata <- subset(surveydata, d_educ!="" & Progress >= 90)
surveydata$respondent_na <- FALSE 

# identify respondents who meet juror qualifications
surveydata$isQualified <- identifyQualifiedJurors(surveydata)


# ---- Calculate Sampling Weights for Target Population ------------------------

targetdata <- target.population.demographics(state=thisState)

surveydata$black <- (surveydata$d_race == "Black or African American")
surveydata$ba_or_more <- (surveydata$d_educ=="Bachelor's degree in college (4-year)" | surveydata$d_educ=="Doctoral degree" | 
                            surveydata$d_educ=="Master's degree" | surveydata$d_educ=="Professional degree (JD, MD)")
surveydata$inc50k_or_more <- (surveydata$d_income=="$50,000 to $59,999" | surveydata$d_income=="$60,000 to $69,999" |
                                surveydata$d_income=="$70,000 to $79,999" | surveydata$d_income=="$80,000 to $89,999" |
                                surveydata$d_income=="$90,000 to $99,999" | surveydata$d_income=="$100,000 to $149,999" | 
                                surveydata$d_income=="$150,000 or more" )
# the original surveys used individual, rather than HH, income
surveydata$hhincome_over50k <- surveydata$inc50k_or_more # KLUDGE
# will probably need to override the targetdata manually, set its HHincome_over50k to individual income over $50k
if(thisState=="FL") targetdata$hhincome_over50k <- .61 # Florida setting based on individual level incomes
if(thisState=="TX") targetdata$hhincome_over50k <- .49 # Texas setting
if(thisState=="AZ") targetdata$hhincome_over50k <- .50 # Arizona setting
# originally I used age 45 as cutpoint, instead of 35 which is what the target.population.demographics 
# the difference should be trivial
# some respondents entered birth state rather than birth year
# survey package functions can't handle missing values on weighting variables.
# if missing, assume they have average year born to keep them in sample
surveydata$d_yearborn[is.na(surveydata$d_yearborn)]  <- mean(surveydata$d_yearborn, na.rm=T)
surveydata$age35plus <- (2018 - surveydata$d_yearborn) >= 35
surveydata$woman <- (surveydata$d_gender=="Female")
surveydata$hispanic <- (surveydata$d_hispanic=="Yes")

# weights_for_population function adds weights to dataset and returns the dataset
surveydata <- weights_for_population(surveydata, targetdata)
surveydata$wt.raked <- surveydata$weights
surveydata$wt.raked.qual <- surveydata$wt.raked * (1 / mean(surveydata$wt.raked[surveydata$isQualified]) )



# ---- Assess Representativeness of Sample Relative to Target Population -------

demog.variables <- c("black", "ba_or_more", "hhincome_over50k", "age35plus", "woman", "hispanic")
# weighted sample
fun <- function(x, w) wtd.mean(x, w=surveydata$wt.raked)
weightedSample <- apply(X = surveydata[, demog.variables], FUN = fun, MARGIN = 2)
# unweighted sample
fun <- function(x) wtd.mean(x)
rawSample <- apply(X = surveydata[, demog.variables], FUN = fun, MARGIN = 2)
# qualified & weighted sample
fun <- function(x, w) wtd.mean(x, w=surveydata$wt.raked.qual[surveydata$isQualified])
weightedQualifiedSample <- apply(X = surveydata[surveydata$isQualified, demog.variables], FUN = fun, MARGIN = 2)
# sum(isQualified)
# the target data for comparison purposes
targetdata

comparisonTable <- data.frame(t(rbind(rawSample, weightedSample, weightedQualifiedSample, targetdata)))
colnames(comparisonTable) <- c("Raw Sample", "Weighted Sample", "Weighted & Qualified", paste(thisState, "Population"))

if(verbose) {
  RCPA3:::headingbox(text = "Representative of Survey Sample")
  print(comparisonTable)
  cat("\n\n")
}


# ----    Compare Trial Outcomes by Trial Condition     ------------------------

# the variable names in Fulminante study are slightly different
if(is.null(surveydata$if_actual_verdict)) surveydata$if_actual_verdict <- surveydata$if_actualcond_vote
if(is.null(surveydata$if_hypo_verdict))   surveydata$if_hypo_verdict <- surveydata$if_hypocond_vote

# the variable names in Hopkins study are also slightly different
if(is.null(surveydata$if_actual_verdict)) surveydata$if_actual_verdict <- surveydata$if_actualev
if(is.null(surveydata$if_hypo_verdict))   surveydata$if_hypo_verdict <- surveydata$if_hypoev

surveydata$vote_in_actual <- ifelse(surveydata$random_condition=="Actual trial", 
                                    surveydata$vote_verdict, surveydata$if_actual_verdict)
surveydata$vote_in_hypo   <- ifelse(surveydata$random_condition=="Hypothetical trial", 
                                    surveydata$vote_verdict, surveydata$if_hypo_verdict)

# somehow an empty response submitted in Hopkins survey, should be NA
surveydata$vote_in_actual[surveydata$vote_in_actual==""] <- NA
surveydata$vote_in_hypo[surveydata$vote_in_hypo==""] <- NA

invisible(capture.output({
result_actual <- freqC(surveydata$vote_in_actual[surveydata$isQualified], 
                       w = surveydata$wt.raked.qual[surveydata$isQualified], 
                       plot=F, digits = 1)
}))
invisible(capture.output({
result_hypo <- invisible(freqC(surveydata$vote_in_hypo[surveydata$isQualified], 
                     w = surveydata$wt.raked.qual[surveydata$isQualified], 
                     plot=F, digits = 1))
}))

if(!is.na(result_actual["Death penalty", "Percent"])) {
  pg_actual <- result_actual["Death penalty", "Percent"] / 100
} else {
  pg_actual <- result_actual["Guilty", "Percent"] / 100
}
n_actual  <- as.numeric(result_actual["Total", "Frequency"])

if(!is.na(result_hypo["Death penalty", "Percent"])) {
  pg_hypo <- result_hypo["Death penalty", "Percent"] / 100
} else {
  pg_hypo <- result_hypo["Guilty", "Percent"] / 100
}
n_hypo  <- as.numeric(result_hypo["Total", "Frequency"])

# compare individual-level preferences
compare_juror_table <- compare.juror.stats(pg_actual, n_actual, pg_hypo, n_hypo, digits = 3)
if(verbose) {
  RCPA3:::headingbox(text = "Effect on Juror-Level Preferences")
  print(compare_juror_table)
  cat("\n\n")
}

# compare jury-level probabilities
compare_jury_table <- compare.jury.stats(pg_actual, n_actual, pg_hypo, n_hypo, digits = 3, 
                                           pstrikes = pstrikes, dstrikes = dstrikes, jury_n = jury_n)
if(verbose) {
  RCPA3:::headingbox(text = "Effect on Jury-Level Verdict Probabilities")
  print(compare_jury_table)
  cat("\n\n")
}

survey_analysis_results <- list(comparisonTable=comparisonTable, result_actual=result_actual, result_hypo=result_hypo, 
                       compare_juror_table=compare_juror_table, compare_jury_table=compare_jury_table)

